capture log close
clear all
set more off

* Stablish Working Directory ***************************************************
cd "$workdirectory"

*-------------------------------------------------------------------------------
* Creating shares of subdistricts located within a FUA
* Importing intersection of FUAs and districts

import delimited raw_datasets\Maps\intersection_adm3.csv, encoding(UTF-8) clear
egen geolevel3 = rowtotal(ipum2014 ipum2013 ipum2007)
save "processed_datasets\dataset_maps_and_cities_adm3_classwk", replace

* Importing area of districts
* We drop observations without geolevel3 and also observations with geolevel == 888888

import delimited raw_datasets\Maps\area_subsubdist.csv, encoding(UTF-8) clear
egen geolevel3 = rowtotal(ipum2014 ipum2013 ipum2007)
keep geolevel3 cntry_code area_subsubdist
drop if geolevel3 == .
tempfile area_subsubdist
save `area_subsubdist', replace

* Merging and obtaining shares
use "processed_datasets\dataset_maps_and_cities_adm3_classwk", clear
merge m:1 cntry_code geolevel3 using `area_subsubdist'
keep if _merge == 3
drop _merge

gen share = area_calcu/area_subsubdist
sort cntry_code geolevel3
save "processed_datasets\dataset_maps_and_cities_adm3_classwk", replace

*-------------------------------------------------------------------------------
* Generating industry per city data

* Generating empty file to attach data to
clear all
tempfile data_classwk_cities_adm3
save `data_classwk_cities_adm3', empty replace

* Genearting local with all .csv in the folder
local files : dir "raw_datasets\Maps\classwk_maps_adm3" files "*.csv"

cd raw_datasets\Maps\classwk_maps_adm3

* Importing, appending and saving
foreach fi in `files' {
  import delimited `fi', varnames(12) rowrange(13) encoding(UTF-8) clear
  gen country_year = subinstr("`fi'","_classwk.csv","",.)
  append using `data_classwk_cities_adm3'
  save `data_classwk_cities_adm3', replace
}

* Cleaning
keep if v1 == "Weighted N"
drop v1
rename v2 census
drop if census == "COL TOTAL"

* Exctracting geolevel3
gen geolevel3 = regexs(0) if(regexm(census, "[0-9]+"))
destring geolevel3, replace

* Generating country codes
gen cntry_code = 0
replace cntry_code = 710 if country_year == "southafrica_2007"
replace cntry_code = 686 if country_year == "senegal_2013"
*replace cntry_code = 104 if country_year == "myanmar_2014"

sort cntry_code geolevel3
save `data_classwk_cities_adm3', replace


********************************************************************************

cd ..\..\..


*-------------------------------------------------------------------------------
* Genearting final dataset

* Joining both datasets
use "processed_datasets\dataset_maps_and_cities_adm3_classwk", clear
joinby cntry_code geolevel3 using `data_classwk_cities_adm3'

* Applying Shares
foreach v of varlist niunotinuniverse selfemployed wagesalaryworker unpaidworker unknownmissing rowtotal other{
	gen _`v' = `v'*share
}

* Fixing country names from both datasets
replace cntry_na_1 = "South Africa" if cntry_na_1 == "SouthAfrica"

* Drop intersections which associate districts from country i to a city located in country j
drop if cntry_name != cntry_na_1

* Collapsing by city and census
collapse (sum) _* (mean) fua_p_2015 (first) efua_name, by(efua_id country_year)

foreach v2 of varlist _selfemployed _wagesalaryworker _unpaidworker _unknownmissing _other{
	gen share`v2' = `v2'/(_rowtotal-_niunotinuniverse)
}

save "processed_datasets\dataset_maps_and_cities_adm3_classwk", replace

